{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "##    softmax 以及损失计算\n",
    "    \n",
    "##### 1.本文主要讲述 ：CrossEntropyLoss() 和 NLLLoss() 的区别以及他们和Softmax的关系\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 本文头文件导入\n",
    "import torch\n",
    "import torch.nn as nn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.1342, -2.5835, -0.9810],\n",
       "        [ 0.1867, -1.4513, -0.3225],\n",
       "        [ 0.6272, -0.1120,  0.3048]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#  输出矩阵，假设输入3张图片，分3类，最后的输出是一个3*3的tensor\n",
    "#  第1，2，3行分别是 第1，2，3张图片的结果，第1，2，3列分别是3个分类的得分，如1，2，3列，分别是猫、狗和猪的分类得分\n",
    "\n",
    "input = torch.randn(3,3) # 随机产生一个3*3的矩阵，tensor类型\n",
    "input\n",
    "\n",
    "\n",
    "input = torch.tensor([[-0.1342,-2.5835,-0.9810],\n",
    "                     [0.1867,-1.4513,-0.3225],\n",
    "                      [0.6272,-0.1120,0.3048]          \n",
    "                     ])\n",
    "input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0.6600, 0.0570, 0.2830],\n",
       "        [0.5570, 0.1083, 0.3347],\n",
       "        [0.4542, 0.2169, 0.3290]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对每一行使用softmax，可得到每张图片的概率分布\n",
    "sm = nn.Softmax(dim=1)\n",
    "sm(input)\n",
    "\n",
    "# dim = 1 表示按行计算softmax\n",
    "# dim = 0 表示按列计算softmax\n",
    "# 结果解释，可以看出3张图片，最有可能的标签分别是，猪，狗，猪"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.4155, -2.8648, -1.2623],\n",
       "        [-0.5852, -2.2232, -1.0944],\n",
       "        [-0.7893, -1.5285, -1.1117]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input = torch.log(sm(input)) \n",
    "input\n",
    "# 对每个分数 求 ln()，因为每个分数的概率本来是小于1大于0的。所以求ln后都变成了(0,-无穷)的值了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0128333333333333"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# NLLLoss的结果就是把上面的输出  与  Label对应的那个值拿出来，再去掉负号，再求均值。\n",
    "\n",
    "# 0猫、1狗 和2猪，表示标签的值\n",
    "# 假设我们现在的target是【0,2,1】（第一张图片是猫，第二张图片是猪，第三章是狗），第一行取第0列元素，第二行取第2列元素，第三行取第1列\n",
    "# 现在 取 [0.6703,1.2447,0.9952]\n",
    "\n",
    "(0.4155+1.0945+1.5285) / 3\n",
    "\n",
    "# 参考之前设定的 如1，2，3列，分别是猫、狗和猪的分类得分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.0128)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 验证NLLLoss\n",
    "\n",
    "loss = nn.NLLLoss()\n",
    "target = torch.tensor([0,2,1])\n",
    "loss(input,target)  # input = torch.randn(3,3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.0128)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input = torch.tensor([[-0.1342,-2.5835,-0.9810],\n",
    "                     [0.1867,-1.4513,-0.3225],\n",
    "                      [0.6272,-0.1120,0.3048]          \n",
    "                     ])\n",
    "\n",
    "# crossEntropyLoss 等于  softmax-->max -->NLLLoss,将这几个过程合并起来了。\n",
    "loss = nn.CrossEntropyLoss()\n",
    "target = torch.tensor([0,2,1])\n",
    "loss(input,target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
